This markdown follows differential expression analyses from deseq2_analyses.Rmd. DEGs from P70 (39) pre-cystic data are used as input for signatureSearch. Results are annotated with Drug Repurposing Hub and onSides data.
Data Sets - ’39 / Pre-Cystic P70: GSE149739
- ’19 / Cystic P21: GSE134719
- ’56 / Cystic P28: GSE69556
Load libraries
library(signatureSearch)
library(ggplot2)
library(ggalluvial)
#library(readr)
library(signatureSearch)
library(ExperimentHub); library(rhdf5)
library(here)
library(dplyr)
library(stringr)
source(here("code", "functions.R"))
Pull LINCS data from Experiment Hub
eh <- ExperimentHub()
cmap <- eh[["EH3223"]]; cmap_expr <- eh[["EH3224"]] #cmap data locations in experimenthub
lincs <- eh[["EH3226"]]; lincs_expr <- eh[["EH3227"]] #lincs data locations in experimenthub
h5ls(lincs) #reading hdf5 file
## group name otype dclass dim
## 0 / assay H5I_DATASET FLOAT 12328 x 45956
## 1 / colnames H5I_DATASET STRING 45956 x 1
## 2 / rownames H5I_DATASET STRING 12328 x 1
db_path <- system.file("extdata", "sample_db.h5", package = "signatureSearch")
Load in top up and down DEGs
hom_p70_UP <- read.csv(here("res", "deseq2_outputs", "p70_signature_UP.csv"))
hom_p70_DOWN <- read.csv(here("res", "deseq2_outputs", "p70_signature_DOWN.csv"))
Run signatureSearch, filter for kidney cell lines
gess_kidneycells_p70 <- ssRun(upgenes = hom_p70_UP$entrezgene_id, downgenes = hom_p70_DOWN$entrezgene_id, cells = "kidney")
## 100 / 100 genes in up set share identifiers with reference database
## 31 / 31 genes in down set share identifiers with reference database
Total results (inversely related)
dim(gess_kidneycells_p70)
## [1] 730 16
head(gess_kidneycells_p70, 30)
## # A tibble: 30 × 16
## pert cell type trend WTCS WTCS_…¹ WTCS_…² NCS Tau TauRe…³ NCSct
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 etoposide HA1E trt_… down -0.713 1.25e-5 1.25e-4 -2.03 -99.6 2241 -1.78
## 2 SA-84869 NKDBA trt_… down -0.699 1.31e-5 1.25e-4 -1.69 NA 229 -1.40
## 3 pyrvinium… HA1E trt_… down -0.663 1.47e-5 1.25e-4 -1.88 -99.6 2241 -1.69
## 4 BRD-K9231… HA1E trt_… down -0.663 1.47e-5 1.25e-4 -1.88 -99.6 2241 -1.38
## 5 SA-1919710 NKDBA trt_… down -0.662 1.47e-5 1.25e-4 -1.60 NA 229 -1.51
## 6 BRD-K9519… HA1E trt_… down -0.646 1.54e-5 1.25e-4 -1.84 -99.0 2241 -1.52
## 7 amsacrine HA1E trt_… down -0.639 1.57e-5 1.25e-4 -1.81 -98.8 2241 -1.59
## 8 BRD-K6087… HA1E trt_… down -0.634 1.59e-5 1.25e-4 -1.80 -98.8 2241 -1.61
## 9 LY-294002 NKDBA trt_… down -0.630 1.61e-5 1.25e-4 -1.52 NA 229 -1.07
## 10 thapsigar… HA1E trt_… down -0.628 1.62e-5 1.25e-4 -1.79 -98.4 2241 -1.56
## # … with 20 more rows, 5 more variables: N_upset <int>, N_downset <int>,
## # t_gn_sym <chr>, MOAss <chr>, PCIDss <chr>, and abbreviated variable names
## # ¹​WTCS_Pval, ²​WTCS_FDR, ³​TauRefSize
write.csv(gess_kidneycells_p70, file = here("res", "sigsearch_outputs", "gess_kidneycells_p70.csv"))
LINCS Drug Repurposing Hub, contains drug names, MOAs, original indications, and targets
repurpHub <- read.delim(here("data", "Repurposing_Hub_export.txt"))
rHub_p70 <- dplyr::inner_join(gess_kidneycells_p70, repurpHub, by= c("pert" = "Name"))
P70/’39 Already-Launched Drugs
rhub_launched_p70 <- dplyr::filter(rHub_p70, Phase == "Launched")
write.csv(rhub_launched_p70, file = here("res", "sigsearch_outputs", "rhub_launcheddrugs_p70.csv"))
Alluvial plot of drugs-targets with just launched drugs
rhub_launched_p70_top30 <- dplyr::slice_min(rhub_launched_p70, order_by = Tau, n = 30)
rhub_launched_p70_top_septarget <- tidyr::separate_rows(rhub_launched_p70_top30, Target, sep = ", ")
#remove drugs with empty 'target' field
rhub_launched_p70_top_septarget <- dplyr::filter(rhub_launched_p70_top_septarget, Target != "")
plot
target_alluvial <- ggplot(data = rhub_launched_p70_top_septarget,
aes(axis1 = pert, axis2 = Target)) +
geom_alluvium(color = "black", aes(fill = WTCS)) + # , colour = trend)) +
geom_stratum(width = 3/12, color = "black") + #the border of the stratum
scale_x_discrete(expand = c(.07, .07)) +
scale_y_discrete(expand = c(.007, .007)) +
scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
geom_text(stat = "stratum", size = 3.5, fontface = "bold", aes(label = after_stat(stratum))) +
labs(title = "Targets of Top 20 Launched Drugs") +
theme_void()
target_alluvial
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
MOA’s separated, string wrap labels
rhub_launched_p70_top_moasep <- tidyr::separate_rows(rhub_launched_p70_top30, MOA, sep = ",")
rhub_launched_p70_top_moasep$MOA <- stringr::str_wrap(rhub_launched_p70_top_moasep$MOA, width = 20)
rhub_launched_p70_top_moasep$Disease.Area <- stringr::str_wrap(rhub_launched_p70_top_moasep$Disease.Area, width = 30)
plot
moa_alluvial <- ggplot(data = rhub_launched_p70_top_moasep,
aes(axis1 = pert, axis2 = MOA)) +
geom_alluvium(color = "black", aes(fill = WTCS)) + # , colour = trend)) +
geom_stratum(width = 3/12, color = "black") + #the border of the stratum
scale_x_discrete(expand = c(.07, .07)) +
scale_y_discrete(expand = c(.007, .007)) +
scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
geom_text(stat = "stratum", size = 3.5, fontface = "bold", aes(label = after_stat(stratum))) +
labs(title = "MOA of Top 30 Launched Drugs") +
theme_void()
moa_alluvial
ggsave(here("res", "sigsearch_outputs", "moa_alluvial_wtcs_220819.pdf"), moa_alluvial, width = 10, height = 15)
P70/’39 Already-Launched Drugs
rhub_launched_p70 <- dplyr::filter(rHub_p70, Phase == "Launched")
Save
write.csv(rHub_p70, file = here("res", "sigsearch_outputs", "ssresults_rHub_p70.csv"))
Code adapted from Jen Fisher
Data: Drugs@FDA database was downloaded in August 2022 from https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files (Accessed date: August 2022). From the information about drugs (i.e, marketing status, application information, etc.), we created a table with information including the method of drug delivery, dosage, active ingredients, and FDA approval (FDA_products_status_220816.csv) FDA terminology https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-glossary-terms#:~:text=Tentative%20Approval&text=FDA%20delays%20final%20approval%20of,market%20the%20generic%20drug%20product. Drugs were filtered for unique names, regardless of form (i.e. injectible, solution, tablet, etc) and further filtered for Marketing Statuses that were not Discontinued (leaving Prescription, Over-the-counter, and Tentative Approval, due to patents)
fda_marketing_status <- read.delim(here("data/fda_data", "MarketingStatus.txt"))
frda_product_info <- read.delim(here("data/fda_data","Products.txt"))
fda_marketing_status$ID<- paste(fda_marketing_status$ApplNo, fda_marketing_status$ProductNo, sep= "_")
frda_product_info$ID<- paste(frda_product_info$ApplNo, frda_product_info$ProductNo, sep= "_")
Add marketing status description to marketing status data
marketing_lookup <- read.delim(here("data/fda_data", "MarketingStatus_Lookup.txt"))
fda_marketing_status <- merge(fda_marketing_status, marketing_lookup, by = "MarketingStatusID")
Combine drug info and status
fda_data <- merge(frda_product_info, fda_marketing_status, by = "ID")
write.csv(fda_data, file = here("data", "FDA_products_status_220816.csv"))
fda_data <- read.csv(here("data", "FDA_products_status_220816.csv"), row.names = 1)
#only unique drugs, regardless of concentration or form
fda_approved <- dplyr::filter(fda_data, MarketingStatusDescription != "Discontinued")
fda_approved <- dplyr::distinct(fda_approved, ActiveIngredient, .keep_all = TRUE)
write.csv(fda_approved, file = here("data", "FDA_approved_unique_ingredients.csv"))
Repurposing hub ‘launched’ drugs
rhub_launched_p70 <- read.csv(here("res", "sigsearch_outputs", "rhub_launcheddrugs_p70.csv"), row.names = 1)
fda_approved <- read.csv(here("data", "FDA_approved_unique_ingredients.csv"), row.names = 1)
Test for FDA approval
fda_approved_p70 <- FDA_APPROVAL_CHECK(rhub_launched_p70$pert, fda_approved)
fda_approved_p70 <- mutate(rhub_launched_p70, fda_approval = fda_approved_p70)
fda_approved_p70 <- dplyr::filter(fda_approved_p70, fda_approval == TRUE)
#results for both kidney cell lines for some drugs
fda_approved_p70 <- dplyr::distinct(fda_approved_p70, pert, .keep_all = TRUE)
save FDA-approved drug candidates
saveRDS(fda_approved_p70, file = here("res", "sigsearch_outputs", "p70_fdaapproved_drugs.rds"))
write.csv(fda_approved_p70, file = here("res", "sigsearch_outputs", "p70_fdaapproved_drugs.csv"))
MOA’s separated, string wrap labels
fda_approved_p70_moa <- tidyr::separate_rows(fda_approved_p70, MOA, sep = ",")
fda_approved_p70_moa_top <- fda_approved_p70_moa %>% dplyr::slice_min(order_by = NCS, n = 42) %>% mutate(pert = forcats::fct_reorder(pert, NCS))
fda_approved_p70_moa_top$MOA <- stringr::str_trunc(fda_approved_p70_moa_top$MOA, width = 38)
#####MOA plot
moa_alluvial <- ggplot(fda_approved_p70_moa_top,
aes(axis1 = pert, axis2 = MOA)) +
geom_alluvium(color = "black", aes(fill = NCS)) + # , colour = trend)) +
geom_stratum(width = 5/16, color = "black") + #the border of the stratum
scale_y_discrete(limits = ) +
scale_fill_viridis_c(aesthetics = c("colour", "fill"), direction = -1) +
geom_text(stat = "stratum", size = 3.25, fontface = "bold", aes(label = after_stat(stratum))) +
#labs(title = "MOA of Top FDA-Approved Drugs") +
theme(axis.title.x = element_blank(), plot.title = element_text(face = "bold", hjust = 0.5), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) + ggtitle(label = "MOA of Top FDA-Approved Drugs") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank() )
moa_alluvial
ggsave(here("res", "sigsearch_outputs", "moa_fda_alluvial.png"), moa_alluvial, width = 12, height = 7, dpi = 600)
Frequency of MOA’s
moa_freq <- as.data.frame(table(fda_approved_p70_moa$MOA))
other <- moa_freq$Var1[moa_freq$Freq==1]
#other <- cbind(Var1 = as.character(other), Freq = rep("other", length(other)))
multi_moa <- dplyr::filter(moa_freq, Freq > 1)
multi_moa$Var1 <- as.character(multi_moa$Var1)
moa_freqs <- rbind(multi_moa, c(Var1 = "other", Freq = length(other)))
moa_freqs$Freq <- as.numeric(moa_freqs$Freq)
#moa_freq$Var1 <- stringr::str_replace(moa_freq$Var1 %in% other == "other")
#moa_freq <- dplyr::filter(moa_freq, Freq > 1)
# Stacked + percent
moa_freqs %>% mutate(MOA = forcats::fct_reorder(Var1, Freq)) %>%
ggplot(aes(fill=MOA, y=Freq, x="MOA")) +
geom_bar(position="fill", stat="identity") +
scale_fill_viridis_d(aesthetics = c("colour", "fill"))
#visualize
moa_freq <- dplyr::filter(moa_freq, Freq > 2)
moa_freq$Var1 <- str_wrap(moa_freq$Var1, width = 25)
moa_frq <- moa_freq %>% mutate(MOA = forcats::fct_reorder(Var1, desc(Freq))) %>% ggplot(aes(x = MOA, y = Freq, fill = Freq)) +
geom_bar(stat = "identity") +
scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
theme(axis.text.x = element_text(size = 13.5, face = "bold", angle = 50, vjust = 0.97, hjust=0.95), axis.title.x = element_blank(), plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) + ggtitle(label = "Most Frequent Mechanisms of Action") +
theme(plot.margin = unit(c(.2,.2,.2,1.5), "cm"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
moa_frq
ggsave(filename = here("res/sigsearch_outputs", "p70_fdaapproved_moa_freq.pdf"), moa_frq, height = 5.5, width = 9.5)
Frequency of original disease area indications
indication_freq <- as.data.frame(table(fda_approved_p70_moa$Disease.Area))
indication_freq <- dplyr::filter(indication_freq, Freq > 2)
plot
#visualize
indication_plot <- indication_freq %>% mutate(indication = forcats::fct_reorder(Var1, desc(Freq))) %>% ggplot(aes(x = indication, y = Freq, fill = Freq)) +
geom_bar(stat = "identity") +
scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
theme(axis.text.x = element_text(size = 13.5, face = "bold", angle = 50, vjust = 0.92, hjust=0.85), axis.title.x = element_blank(), plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) + ggtitle(label = "Most Frequent Disease Area Indications") +
theme(plot.margin = unit(c(.2,.2,.2,1), "cm"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
indication_plot
ggsave(filename =here("res/sigsearch_outputs", "p70_fdaapproved_origindication_freq.pdf"), indication_plot, height = 5.5, width = 8)
R.Version()
## $platform
## [1] "x86_64-apple-darwin17.0"
##
## $arch
## [1] "x86_64"
##
## $os
## [1] "darwin17.0"
##
## $system
## [1] "x86_64, darwin17.0"
##
## $status
## [1] ""
##
## $major
## [1] "4"
##
## $minor
## [1] "2.0"
##
## $year
## [1] "2022"
##
## $month
## [1] "04"
##
## $day
## [1] "22"
##
## $`svn rev`
## [1] "82229"
##
## $language
## [1] "R"
##
## $version.string
## [1] "R version 4.2.0 (2022-04-22)"
##
## $nickname
## [1] "Vigorous Calisthenics"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
## signatureSearchData stringr dplyr
## "1.10.0" "1.4.1" "1.0.10"
## here rhdf5 ExperimentHub
## "1.0.1" "2.40.0" "2.4.0"
## AnnotationHub BiocFileCache dbplyr
## "3.4.0" "2.4.0" "2.2.1"
## ggalluvial ggplot2 signatureSearch
## "0.12.3" "3.4.0" "1.10.0"
## SummarizedExperiment Biobase GenomicRanges
## "1.26.1" "2.56.0" "1.48.0"
## GenomeInfoDb IRanges S4Vectors
## "1.32.4" "2.30.1" "0.34.0"
## BiocGenerics MatrixGenerics matrixStats
## "0.42.0" "1.8.1" "0.62.0"
## Rcpp
## "1.0.9"